`timescale 1ns / 1ps
//////////////////////////////////////////////////////////////////////////////////
// Company: 
// Engineer: 
// 
// Create Date: 2020/07/22 18:54:00
// Design Name: 
// Module Name: fir_filter
// Project Name: 
// Target Devices: 
// Tool Versions: 
// Description: 
// 
// Dependencies: 
// 
// Revision:
// Revision 0.01 - File Created
// Additional Comments:
// 
//////////////////////////////////////////////////////////////////////////////////

// Source From CSDN: https://blog.csdn.net/yanshanyan/article/details/82414688
//fir filter
//data witdh: 16
//filter coeff :-0.0003706  -0.0011344  -0.0018156  -0.0020642  -0.0012814   0.0010011   0.0045544   0.0080695   0.0093324   0.0061059  -0.0025289   -0.014965   -0.026692   -0.031215   -0.022162   0.0040632    0.046173    0.097395     0.14687     0.18274     0.19584     0.18274     0.14687    0.097395    0.046173   0.0040632   -0.022162   -0.031215   -0.026692   -0.014965  -0.0025289   0.0061059   0.0093324   0.0080695   0.0045544   0.0010011  -0.0012814  -0.0020642  -0.0018156  -0.0011344  -0.0003706
//model by crazyalpha (@github)
//email: yunweidz@126.com
 
module fir_filter(clk, data_in, fil_out);
 
  parameter dat_width = 16;
  parameter fil_len = 40;   //FIR order
 
  input clk;
  input [dat_width-1 : 0] data_in;
  output [dat_width-1 : 0] fil_out;
 
  wire [dat_width-1 :0] coef[(fil_len+1)>>1 :0];
 
  //filter coefficient
  assign coef[0] = -12;
  assign coef[1] = -37;
  assign coef[2] = -59;
  assign coef[3] = -68;
  assign coef[4] = -42;
  assign coef[5] = 33;
  assign coef[6] = 149;
  assign coef[7] = 264;
  assign coef[8] = 306;
  assign coef[9] = 200;
  assign coef[10] = -83;
  assign coef[11] = -490;
  assign coef[12] = -875;
  assign coef[13] = -1023;
  assign coef[14] = -726;
  assign coef[15] = 133;
  assign coef[16] = 1513;
  assign coef[17] = 3191;
  assign coef[18] = 4813;
  assign coef[19] = 5988;
  assign coef[20] = 6417;
 
  //input latch
  reg [dat_width-1 :0] data_tmp;
  always @(posedge clk)
  begin
      data_tmp <= data_in;
  end
  //multiply
  wire [dat_width*2-1 :0] m_result[(fil_len+1)>>1 :0];
  genvar j;
  generate for(j=0; j<=(fil_len+1)>>1; j=j+1)   // Here is fil_len=40, so the design need 41 DSP-source.
        //So, here we can use symmetry (Chinese: Dui Cheng xing) to improve DSP-src to 21. This will implement in another design.
  begin:filter1
      signed_mult signed_mult_inst(.clk(clk), .dataa(data_tmp), .datab(coef[j]),.result(m_result[j]));
  end
  endgenerate
 
  //get sum
  integer i;
  reg [dat_width*2-1:0] fil_reg[fil_len:0];
  always @(posedge clk) // Here is a series of add of 32bits. It can run at 2ns(500M) When fil_len =  40;
                        //So,here we can Use pipline-streaming to increaing clock frequence. This will implement in another design.
  begin
 	fil_reg[0] <= m_result[0];
	 	for(i=1; i<(fil_len+1)>>1; i=i+1)
			fil_reg[i] <= fil_reg[i-1] + m_result[i];	
		for(i=(fil_len+1)>>1; i<=fil_len; i=i+1)
			fil_reg[i] <= fil_reg[i-1] + m_result[fil_len-i];
  end
 
  //output
  wire [dat_width*2-1:0] filout_tmp = fil_reg[fil_len];
  assign fil_out =filout_tmp[dat_width*2-2:dat_width-1 ];
endmodule


// module signed_mult(dataa,datab,result);
  // parameter  datin_width = 16;
  // input signed [datin_width-1 :0]dataa, datab;
  // output signed [datin_width*2-1 :0] result;
  // assign result ={dataa } * {datab };
// endmodule   

//When use reg to output the multiply answer, the design can run much faster than before. 
//It can run up to 550M in vivado 2016.2 .
module signed_mult(clk,dataa,datab,result);
  parameter  datin_width = 16;
  input clk;
  input signed [datin_width-1 :0]dataa, datab;
  output reg signed [datin_width*2-1 :0] result;
  always @(posedge clk) begin
	result <={dataa } * {datab };
	
  end
endmodule    



  